********************************************************************************
********************************************************************************
** Baseline regressions
** Dependent variable: Within year standardized FTNA GPA based on English,
**	Kiswahili, and Mathematics.
** Explanatory variable of interest: Private school enrolment.
** Controlling for:
**		1) gender, uncommon name, year dummies.
**		2) gender, uncommon name, year dummies, PSLE score.
**		3) gender, uncommon name, PSLE score, primary school times year FE.
**		4) gender, uncommon name, PSLE score, primary school times year FE,
**			peer effects.
**		5) gender, uncommon name, primary school times year times PSLE score FE,
**			peer effects.
**		6) gender, uncommon name, primary school times year times PSLE score FE,
**			peer effects, proxy for unobserved ability.
********************************************************************************
********************************************************************************

** Load data
use "$dataraw_path\data_ftna_publication.dta", clear


** [1] Students in private schools achieve higher FTNA scores
eststo model1: reg gpa_ftna_core_sd private female ///
	uncommon_name i.year ///
if sample==1, cl(school_id)
predict y1, xb
mat b = e(b)
gen b1 = b[1,1]

** [2] Some is caused by selection bias in regard to PSLE scores
eststo model2: reg gpa_ftna_core_sd private female ///
	uncommon_name gpa_psle_core_sd i.year ///
	if sample==1, cl(school_id)
predict y2, xb
mat b = e(b)
gen b2 = b[1,1]

** [3] Some is caused by the student's primary school
eststo model3: areg gpa_ftna_core_sd private female ///
	uncommon_name gpa_psle_core_sd ///
	if sample==1, cl(school_id) a(school_id_psle_year)
predict y3, xb
mat b = e(b)
gen b3 = b[1,1]

** [4] Some is caused by peer effects as their classmates are better.
eststo model4: areg gpa_ftna_core_sd private female ///
	uncommon_name gpa_psle_core_sd peers_score_core_sd peers_fail_share ///
	peers_as_share ///
	if sample==1, cl(school_id) a(school_id_psle_year)
predict y4, xb
mat b = e(b)
gen b4 = b[1,1]

** [5] Some is casued by different returns to school inputs
eststo model5: areg gpa_ftna_core_sd private female ///
	uncommon_name peers_score_core_sd peers_fail_share peers_as_share ///
	if sample==1, cl(school_id) a(group_id)
predict y5, xb
mat b = e(b)
gen b5 = b[1,1]

** [6] Finally, students in private schools may have higher unobserved ability
*	leading to higher learning speed. Although largely accounted for in the 
*	gpa_psle_core_sd, the gpa_psle_other_sd captures if some students performed
*	better in other subjects than the three core subjects. This variable may,
*	however, also capture complementarity between subjects and measurement error
eststo model6: areg gpa_ftna_core_sd private female ///
	uncommon_name peers_score_core_sd peers_fail_share peers_as_share ///
	gpa_psle_other_sd ///
	if sample==1, cl(school_id) a(group_id)
predict y6, xb
mat b = e(b)
gen b6 = b[1,1]


* Composite peer effect moving from 20th to 80th percentile
_pctile peers_score_core_sd if sample==1, p(20, 80)
local peer1_20 = r(r1)
local peer1_80 = r(r2)
_pctile peers_fail_share if sample==1, p(20, 80)
local peer2_20 = r(r2) // opposite for failing as it has a negative effect
local peer2_80 = r(r1) // opposite for failing as it has a negative effect
_pctile peers_as_share if sample==1, p(20, 80)
local peer3_20 = r(r1)
local peer3_80 = r(r2)
di (`peer1_80'-`peer1_20')*b[1,4] + (`peer2_80'-`peer2_20')*b[1,5] + ///
	(`peer3_80'-`peer3_20')*b[1,6]

sum peers_score_core_sd if sample==1
local peer1_sd = r(sd)
sum peers_fail_share if sample==1
local peer2_sd = r(sd)
sum peers_as_share if sample==1
local peer3_sd = r(sd)
di `peer1_sd'*b[1,4] - `peer2_sd'*b[1,5] + `peer3_sd'*b[1,6]


* Only student groups with treatment variation
preserve
	bys group_id: egen temp_private_min = min(private)
	bys group_id: egen temp_private_max = max(private)
	keep if sample==1 & temp_private_min==0 & temp_private_max==1
	// Do all regressions again (now only with treatment variation)
restore


** Output of main table
esttab model1 model2 model3 model4 model5 model6 using ///
"$out_path\table3.tex", ///
replace se stats(N r2, fmt(%12.3gc) labels("\(N\)" "\(R^2\)")) compress ///
nomtitles starlevels("" 0.01) substitute(\_ _) b(3) ///
/*KEEP*/k(private female uncommon_name gpa_psle_core_sd peers_score_core_sd ///
peers_fail_share peers_as_share gpa_psle_other_sd) ///
/*ORDER*/o(private female uncommon_name gpa_psle_core_sd peers_score_core_sd ///
peers_fail_share peers_as_share gpa_psle_other_sd) ///
/*LABELS*/varl(private "\$Private_s$" female "\$Female$" ///
uncommon_name "\$\textit{Uncommon name}$" ///
peers_score_core_sd "\$\textit{Peers PSLE}_{s}$" ///
peers_fail_share "\$\textit{Peers failed}_{s}$" ///
peers_as_share "\$\textit{Peers with A}_{s}$" ///
gpa_psle_core_sd "\$\textit{GPA}_{p} \textit{ (PSLE)}$" ///
gpa_psle_other_sd "\$\textit{GPA other}_{p} \textit{ (PSLE)}$")


** Box plot of predicted outcome variable
{
preserve
	drop if sample!=1
	expand 2, gen(temp_new1)
	expand 2 if temp_new1==1, gen(temp_new2)
	expand 2 if temp_new2==1, gen(temp_new3)
	expand 2 if temp_new3==1, gen(temp_new4)
	expand 2 if temp_new4==1, gen(temp_new5)
	gen model = 1 if temp_new1==0
	replace model = 2 if temp_new1==1 & temp_new2==0
	replace model = 3 if temp_new2==1 & temp_new3==0
	replace model = 4 if temp_new3==1 & temp_new4==0
	replace model = 5 if temp_new4==1 & temp_new5==0
	replace model = 6 if temp_new5==1
	label define model 1 "Model 1" 2 "Model 2" 3 "Model 3" 4 "Model 4" ///
	5 "Model 5" 6 "Model 6"
	label val model model
	gen temp_pub = y1 if private==0 & model==1
	gen temp_pri1 = y1-b1 if private==1 & model==1
	gen temp_pri2 = y1 if private==1 & model==1
	replace temp_pub = y2 if private==0 & model==2
	replace temp_pri1 = y2-b2 if private==1 & model==2
	replace temp_pri2 = y2 if private==1 & model==2
	replace temp_pub = y3 if private==0 & model==3
	replace temp_pri1 = y3-b3 if private==1 & model==3
	replace temp_pri2 = y3 if private==1 & model==3
	replace temp_pub = y4 if private==0 & model==4
	replace temp_pri1 = y4-b4 if private==1 & model==4
	replace temp_pri2 = y4 if private==1 & model==4
	replace temp_pub = y5 if private==0 & model==5
	replace temp_pri1 = y5-b5 if private==1 & model==5
	replace temp_pri2 = y5 if private==1 & model==5
	replace temp_pub = y6 if private==0 & model==6
	replace temp_pri1 = y6-b6 if private==1 & model==6
	replace temp_pri2 = y6 if private==1 & model==6

	graph box temp_pub temp_pri1 temp_pri2, over(model) nooutsides ///
	legend(cols(3) label(1 "Public") label(2 "Private excl. PSLP") ///
	label(3 "Private")) ytitle("Predicted outcome variable" " ") ///
	ylabel(-2(1)3) ytick(-2(0.5)3, grid gmax tstyle(major_nolabel)) ///
	graphregion(color(white)) note("")
	graph export "$out_path\figurea2.tif", replace
restore
}
*


** Allowing peer effects in the private school learning premium
{
eststo no_peers5: areg gpa_ftna_core_sd private female ///
	uncommon_name ///
	if sample==1, cl(school_id) a(group_id)

eststo no_peers6: areg gpa_ftna_core_sd private female ///
	uncommon_name gpa_psle_other_sd ///
	if sample==1, cl(school_id) a(group_id)

esttab no_peers5 no_peers6 using "$out_path\tablea4.tex", ///
replace se stats(N r2, fmt(%12.3gc) labels("\(N\)" "\(R^2\)")) compress ///
nomtitles starlevels("" 0.01) substitute(\_ _) b(3) ///
/*KEEP*/k(private female uncommon_name gpa_psle_other_sd) ///
/*ORDER*/o(private female uncommon_name gpa_psle_other_sd) ///
/*LABELS*/varl(private "\$Private_s$" female "\$Female$" ///
uncommon_name "\$\textit{Uncommon name}$" ///
gpa_psle_other_sd "\$\textit{GPA other}_{p} \textit{ (PSLE)}$")
}
*


** Changing outcome to all secondary school exam scores
{
egen gpa_ftna_all_sd = std(gpa_ftna_all) if sample==1
egen group_id2 = group(school_id_psle_year gpa_psle_all)
egen kiswahili_psle_num_sd = std(kiswahili_psle_num) if sample==1
egen english_psle_num_sd = std(english_psle_num) if sample==1
egen math_psle_num_sd = std(math_psle_num) if sample==1
egen society_psle_num_sd = std(society_psle_num) if sample==1
egen science_psle_num_sd = std(science_psle_num) if sample==1

eststo model_all1: areg gpa_ftna_all_sd private female ///
	uncommon_name peers_score_core_sd peers_fail_share peers_as_share ///
	gpa_psle_other_sd ///
	if sample==1, cl(school_id) a(group_id)

eststo model_all2: areg gpa_ftna_all_sd private female ///
	uncommon_name peers_score_core_sd peers_fail_share peers_as_share ///
	if sample==1, cl(school_id) a(group_id2)

eststo model_all3: areg gpa_ftna_all_sd private female ///
	uncommon_name peers_score_core_sd peers_fail_share peers_as_share ///
	kiswahili_psle_num_sd english_psle_num_sd math_psle_num_sd ///
	society_psle_num_sd science_psle_num_sd ///
	if sample==1, cl(school_id) a(school_id_psle_year)

capture drop gpa_ftna_all_sd group_id2 kiswahili_psle_num_sd ///
	english_psle_num_sd math_psle_num_sd society_psle_num_sd science_psle_num_sd

esttab model_all1 model_all2 model_all3 using ///
"$out_path\tablea5.tex", ///
replace se stats(N r2, fmt(%12.3gc) labels("\(N\)" "\(R^2\)")) compress ///
nomtitles starlevels("" 0.01) substitute(\_ _) b(3) ///
/*KEEP*/k(private gpa_psle_other_sd kiswahili_psle_num_sd ///
english_psle_num_sd math_psle_num_sd society_psle_num_sd ///
science_psle_num_sd) ///
/*ORDER*/o(private gpa_psle_other_sd kiswahili_psle_num_sd ///
english_psle_num_sd math_psle_num_sd society_psle_num_sd ///
science_psle_num_sd) ///
/*LABELS*/varl(private "\$Private_s$" ///
gpa_psle_other_sd "\$\textit{GPA other}_{p}$" ///
kiswahili_psle_num_sd "\$\textit{Kiswahili score}_{p}$" ///
english_psle_num_sd "\$\textit{English score}_{p}$" ///
math_psle_num_sd "\$\textit{Mathematics score}_{p}$" ///
society_psle_num_sd "\$\textit{Community knowledge score}_{p}$" ///
science_psle_num_sd "\$\textit{Science score}_{p}$")
}
*


** Descriptives for students in subgroups with private school variation
{
bys group_id: egen temp_private_min = min(private)
bys group_id: egen temp_private_max = max(private)
gen variation = 1 if temp_private_min==0 & temp_private_max==1
	
eststo all_pub: estpost summarize gpa_ftna_core gpa_psle_core gpa_psle_other ///
	private_psle private female uncommon_name peers_score_core ///
	peers_fail_share peers_as_share cohort16 cohort17 religion bible_school ///
	islam_school female_school male_school ///
	if sample==1 & public==1, meanonly

eststo all_pri: estpost summarize gpa_ftna_core gpa_psle_core gpa_psle_other ///
	private_psle private female uncommon_name peers_score_core ///
	peers_fail_share peers_as_share cohort16 cohort17 religion bible_school ///
	islam_school female_school male_school ///
	if sample==1 & public==0, meanonly

eststo var_pub: estpost summarize gpa_ftna_core gpa_psle_core gpa_psle_other ///
	private_psle private female uncommon_name peers_score_core ///
	peers_fail_share peers_as_share cohort16 cohort17 religion bible_school ///
	islam_school female_school male_school ///
	if sample==1 & public==1 & variation==1, meanonly

eststo var_pri: estpost summarize gpa_ftna_core gpa_psle_core gpa_psle_other ///
	private_psle private female uncommon_name peers_score_core ///
	peers_fail_share peers_as_share cohort16 cohort17 religion bible_school ///
	islam_school female_school male_school ///
	if sample==1 & public==0 & variation==1, meanonly

capture drop temp_private_min temp_private_max variation

esttab all_pub all_pri var_pub var_pri using ///
"$out_path\tablea6.tex", replace ///
cells("mean(pattern(1 1 1 1 0) fmt(3)) b(pattern(0 0 0 0 1) fmt(3)) se(pattern(0 0 0 0 1) fmt(3))") ///
varlabels(gpa_ftna_core "GPA FTNA" gpa_psle_core "GPA PSLE" ///
gpa_psle_other "GPA PSLE other" private_psle "Private primary" ///
private "Private secondary" /*school_size "Secondary school size"*/ ///
peers_score_core "Peers PSLE" peers_fail_share "Peers failed (PSLE)" ///
peers_as_share "Peers with A (PSLE)" female "Female" ///
religion "Religious courses" uncommon_name "Uncommon name" ///
bible_school "Bible course" islam_school "Islam course" ///
cohort16 "Cohort 2016" cohort17 "Cohort 2017" ///
female_school "Girls only (secondary)" male_school "Boys only (secondary)")
}
*

* Drop stored estimates
capture drop _est_* y1 y2 y3 y4 y5 y6 b1 b2 b3 b4 b5 b6
